package cn.omisheep.commons.util.charset.detector;

import cn.omisheep.commons.util.charset.detector.jchardet.nsDetector;
import cn.omisheep.commons.util.charset.detector.jchardet.nsPSMDetector;

import java.io.*;
import java.nio.charset.Charset;

/**
 * 字符集探测器
 *
 * @author zhouxinchen[1269670415@qq.com]
 * @since 1.0.8
 */
public class CharsetDetector {

    private CharsetDetector() {
        throw new UnsupportedOperationException();
    }

    /**
     * Detector string content character set
     * 探测字符串内容字符集
     *
     * @param content 字符串内容
     * @return the charset 返回对应的字符集
     */
    public static Charset detectStr(String content) throws IOException {
        return detect(new ByteArrayInputStream(content.getBytes()));
    }

    /**
     * Detecting a file content character set based on a file path
     * 依据文件路径探测文件内容字符集
     *
     * @param path the path of file
     *             文件路径
     */
    public static Charset detect(String path) throws IOException {
        return detect(new File(path));
    }

    /**
     * 探测文件内容字符集
     */
    public static Charset detect(File file) throws IOException {
        try (FileInputStream stream = new FileInputStream(file)) {
            return detect(stream);
        }
    }

    /**
     * 探测流内容字符集内容
     */
    public static Charset detect(InputStream inputStream) throws IOException {
        return detect(inputStream, nsPSMDetector.ALL);
    }

    /**
     * 探测流内容字符集内容
     */
    public static Charset detect(InputStream inputStream,
                                 int langFlag) throws IOException {
        final boolean[] found   = {false};
        final String[]  charset = new String[1];
        nsDetector      det     = new nsDetector(langFlag);
        det.Init(a -> {
            found[0]   = true;
            charset[0] = a;
        });

        byte[]  buf     = new byte[1024];
        int     len;
        boolean done    = false;
        boolean isAscii = true;

        while ((len = inputStream.read(buf, 0, buf.length)) != -1) {
            if (isAscii)
                isAscii = det.isAscii(buf, len);
            if (!isAscii && !done)
                done = det.DoIt(buf, len, false);
        }
        det.DataEnd();

        if (isAscii) {
            charset[0] = "ASCII";
            found[0]   = true;
        }

        if (!found[0]) {
            String[] prob = det.getProbableCharsets();
            charset[0] = prob[0];
        }
        return Charset.forName(charset[0]);
    }

}
